{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Methods and Attributes\n", "__Remember__\n", "* Methods ends with **parentheses**, while **attributes** don't\n", "* df.shape: Attribute\n", "* df.info(): Method" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# import pandas \n", "import pandas as pd " ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# read a dataset of top-rated IMDb movies into a DataFrame\n", "movies = pd.read_csv('http://bit.ly/imdbratings')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
star_ratingtitlecontent_ratinggenredurationactors_list
09.3The Shawshank RedemptionRCrime142[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt...
19.2The GodfatherRCrime175[u'Marlon Brando', u'Al Pacino', u'James Caan']
29.1The Godfather: Part IIRCrime200[u'Al Pacino', u'Robert De Niro', u'Robert Duv...
39.0The Dark KnightPG-13Action152[u'Christian Bale', u'Heath Ledger', u'Aaron E...
48.9Pulp FictionRCrime154[u'John Travolta', u'Uma Thurman', u'Samuel L....
\n", "
" ], "text/plain": [ " star_rating title content_rating genre duration \\\n", "0 9.3 The Shawshank Redemption R Crime 142 \n", "1 9.2 The Godfather R Crime 175 \n", "2 9.1 The Godfather: Part II R Crime 200 \n", "3 9.0 The Dark Knight PG-13 Action 152 \n", "4 8.9 Pulp Fiction R Crime 154 \n", "\n", " actors_list \n", "0 [u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt... \n", "1 [u'Marlon Brando', u'Al Pacino', u'James Caan'] \n", "2 [u'Al Pacino', u'Robert De Niro', u'Robert Duv... \n", "3 [u'Christian Bale', u'Heath Ledger', u'Aaron E... \n", "4 [u'John Travolta', u'Uma Thurman', u'Samuel L.... " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# example method: show the first 5 rows \n", "movies.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
star_ratingduration
count979.000000979.000000
mean7.889785120.979571
std0.33606926.218010
min7.40000064.000000
25%7.600000102.000000
50%7.800000117.000000
75%8.100000134.000000
max9.300000242.000000
\n", "
" ], "text/plain": [ " star_rating duration\n", "count 979.000000 979.000000\n", "mean 7.889785 120.979571\n", "std 0.336069 26.218010\n", "min 7.400000 64.000000\n", "25% 7.600000 102.000000\n", "50% 7.800000 117.000000\n", "75% 8.100000 134.000000\n", "max 9.300000 242.000000" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# example method: calculate summary statistics\n", "movies.describe()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(979, 6)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# example attribute: number of rows and columns \n", "movies.shape" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "star_rating float64\n", "title object\n", "content_rating object\n", "genre object\n", "duration int64\n", "actors_list object\n", "dtype: object" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# example attribute: data type of each column\n", "movies.dtypes" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titlecontent_ratinggenreactors_list
count979976979979
unique9751216969
topTrue GritRDrama[u'Daniel Radcliffe', u'Emma Watson', u'Rupert...
freq24602786
\n", "
" ], "text/plain": [ " title content_rating genre \\\n", "count 979 976 979 \n", "unique 975 12 16 \n", "top True Grit R Drama \n", "freq 2 460 278 \n", "\n", " actors_list \n", "count 979 \n", "unique 969 \n", "top [u'Daniel Radcliffe', u'Emma Watson', u'Rupert... \n", "freq 6 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# use an optional parameter to the describe method to summarize only 'object' column\n", "movies.describe(include='object')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" }, "latex_envs": { "LaTeX_envs_menu_present": true, "autoclose": false, "autocomplete": true, "bibliofile": "biblio.bib", "cite_by": "apalike", "current_citInitial": 1, "eqLabelWithNumbers": true, "eqNumInitial": 1, "hotkeys": { "equation": "Ctrl-E", "itemize": "Ctrl-I" }, "labels_anchors": false, "latex_user_defs": false, "report_style_numbering": false, "user_envs_cfg": false }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": true, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }